library(data.table)
library(tidyverse)
library(forecast)
theme_set(theme_bw())기출문제 확인 결과 ARIMA, SARIMA, Time series regression이 나옴.
데이터는 시계열 예제 데이터 수준이므로 분석 프로세스만 숙지하고 있으면 됨
백색잡음과정
서로 독립이고 동일한 분포를 따르는(i.i.d) 확률변수로 구성된 확률과정
뚜렷한 추세가 없음
시계열의 진폭이 일정함
이 형태 혹은 비슷한 형태를 띄어야 정상 시계열로 인식함
기본 가정이라고 생각하면 되고, 정상 시계열 형태가 아닐 경우 변환을 통해 정상 시계열로 만들어줘야함
확률보행과정
평균은 일정하지만 분산과 자기공분산은 t 시점에 의존
뚜렷한 추세가 존재함
진폭이 불규칙적
비정상 시계열의 대표적인 예시로 적절한 변환을 통해 정상 시계열로 만들어줘야함
ACF(자기상관함수)
시계열 자료 특성상 현재의 상태가 과거 및 미래의 상태와 연관되므로 시간에 따른 상관 정도를 측정하는 척도가 필요함
\[ \rho_k = \frac{cov(Z_t, Z_t+k)}{\sqrt{var(Z_k)}\sqrt{var(Z_{t+k})}} \]
Sample correlogram(표본상관도표)
X축을 시차, Y축을 표본 ACF로 표현한 그림으로 모델 인식 절차에 사용됨.
acf() 함수로 작성 가능
\(\rho_k = 0\)이면 표본 ACF는 점근적으로 평균은 0, 분산은 \(\frac{1}{n}\)인 정규분포를 따르며, 이 성질에 따라 신뢰구간이 점선으로 추가됨
표본 ACF가 점선을 벗어나 있으면 \(H_0 : \rho_k = 0, \quad k=1,2,3,\cdots\)을 기각할 수 있음
각 시점에 대해 검정하므로 다중검정의 고질적 문제인 일종 오류가 증가하는 문제가 발생함
따라서 엄격한 검정 보다는 참고자료로 이용
PACF(부분자기상관함수)
\(Z_t\)와 \(Z_{t+k}\) 사이의 직접적인 연관성을 측정하는 함수 \(Z_t, Z_{t+1}, \cdots, Z_k\)가 주어졌을 때 \(Z_t\)와 \(Z_{t+k}\)에서 \(Z_{t+1}, \cdots, Z_{t+k-1}\)의 효과를 제거한 후 상관관계를 측정함
Sample partial correlogram(표본부분상관도표)
X축을 시차, Y축을 표본 PACF로 표현한 그림으로 모델 인식 절차에 사용됨.
pacf() 함수로 작성 가능
문제점 ACF와 동일
따라서 엄격한 검정 보다는 참고자료로 이용
ACF, PACF 그래프 이용해서 모형을 인식함. 따라서 그래프를 정확히 해석하는 것이 중요함
ACF 지수적 감소
ACF 1차 시점 이후 절단(0차 시점은 자기 자신이므로 상관관계는 1, 따라서 고려 X)
이론적 그림이므로 비슷한 형태를 띄면 절단이라고 인식함. 그래프를 이용한 해석은 주관적이므로 정확한 정답은 없음. 다만 딱 봐도 절단이면 절단이라고 인식할 수 있어야 함
ACF 지수적 감소
PACF 2차 시점 이후 절단
AR(p) 차 시점 그림 또한 ACF 지수적 감소, PACF p차 시점 이후 절단으로 동일함
ACF 1차 시점 이후 절단
PACF 지수적 감소
AR모형과 ACF, PACF 그림만 반대이고, 모형 인식 방법은 동일함
AR 모형과 MA모형을 합친 형태로, AR의 p와 MA의 q를 파라미터로 갖는 모형임
그림을 추가하진 않았지만 AR, MA 모형의 지수적 감소 형태와 동일함
| Model | ACF | PACF |
|---|---|---|
| MA(q) | q 시차 이후 0으로 절단 | 지수적으로 감소 |
| AR(p) | 지수적으로 감소 | p시차 이후 0으로 절단 |
| ARMA(p, q) | 지수적으로 감소 | 지수적으로 감소 |
실제 자료에서 정상성을 만족하는 경우는 거의 없으므로 변환을 통해 정상시계열로 만들어줘야함
분산이 일정하지 않은 경우 : 분산안정화 변환 실시
분산 증가 : 로그변환, 제곱근 변환
분산 감소 : 지수변환, 제곱 변환
분산이 증가할 때
분산안정화 변환 실시
추세가 존재하는 경우 : 차분 실시
추세만 보고 명확하지 않을 수 있으므로, ACF 그래프도 같이 확인 필요
1차 차분으로 정상성을 만족하지 않을 경우 2차 차분까지 실시
3차 차분은 거의 하지 않으므로 고려 X
계절 효과가 존재하는 경우 계절형 차분 실시
추세가 존재하는 경우
1차 차분 실시
ARMA 모형에 차분(d)이 추가된 모형임
추세가 있으면 차분 실시
ACF가 천천히 감소하면 차분 실시
과대차분을 통해 차분 차수 결정, ndiffs를 이용해도 됨
ndiffs(x, alpha = 0.05, test = c(‘kpss’, ‘adf’, ‘pp’) ): 일반 차분 차수 추정
x : 시계열 자료
alpha : 유의 수준
test = “kpss” : 귀무가설 - 정상시계열, 대립가설 - 비정상시계열
test = “adf” : 귀무가설 - 비정상시계열, 대립가설 - 정상시계열
test = “pp” : 귀무가설 - 비정상시계열, 대립가설 - 정상시계열
차분을 통해 정상시계열로 만들어준 후 차수 결정
d차 차분된 자료의 ACF, PACF로 차수 결정(AR, MA, ARMA 차수 결정방법과 동일)
데이터 생성 ARIMA(1, 1, 0)에서 난수 발생
set.seed(123)
z <- arima.sim(model = list(order = c(1, 1, 0), ar = 0.5), n = 1000)
plot(z)1차 차분
ARIMA(1, 1, 0)으로 식별
ndiffs(z)## [1] 1
z_dif <- diff(z)
plot(z_dif)acf(z_dif)pacf(z_dif)forecast::ggtsdisplay(z_dif)과대 차분했을 경우 정상성은 만족하지만 AR(1)모형 보다는 더 복잡한 모형으로 인식함. 모수 간결의 원칙에 따라 과대차분 모형 제외
z_diff <- diff(z, d = 2)
plot(z_diff)acf(z_diff)pacf(z_diff)forecast::ggtsdisplay(z_diff)시계열 정상화 단계
등분산 확인 및 필요시 변환
최적의 차분 차수 결정
ACF 형태, 단위근 검정 결과로 판단
과대차분 여부 확인(시험 때 굳이 안해도 될 듯)
SACF를 절단으로 인식 : MA 모형
SPACF를 절단으로 인식: AR 모형
SACF, SPACF 모두 감소로 인식 : ARMA 모형
후보 모형이 많을 때 AIC, BIC가 최소가 되는 모형 선택
보통 후보 모형 2개 정도로 시작
과대 적합 확인할 때 모형이 어차피 늘어나므로 너무 많은 모형을 선택 안해도 됨
arima() 함수의 경우 차분을 실시한 자료에 대한 절편 포함 여부 검정 x
forecast 패키지, Arima()를 이용
Arima(x, order = c(0, 0, 0), include.drift = F, fixed = Null)
x : 시계열 자료
order = c(0, 0, 0) : Arima(p, d, q) 차수 결정
include.drift : d=1의 자료에 대하여 절편 포함 여부
fixed : 비유의적 모수 제거
조건부 최소제곱 추정법
비조건부 최소제곱 추정법
최대가능도 추정법
대부분의 경우 추정 결과에 차이 X, 패키지 이용할 것이므로 고려 X
모형 식별과 모수 추정을 통해 얻어진 잠정 모형의 타당성 여부 판단
잔차분석
과대적합
잠정모형에 모수를 추가한 모형의 유의성을 확인하는 분석 진행
잠정모형이 적절한 모형이라면 추가된 모수들은 유의하지 않게 나올 것이기 때문
잠정모형에 하나의 AR항과 MA항을 각각 추가하여 두개의 과대적합모형 설정
#ex8_2a <- fread("data/timeseries/ex8_2a.txt")
ex_7_5d <- scan("data/timeseries/ex7_5d.txt")
ex_7_5d %>% glimpse()## num [1:100] 15 34 18 17 16 26 26 28 34 30 ...
ex_7_5d.ts <- ts(ex_7_5d)
plot(ex_7_5d.ts)acf(ex_7_5d.ts)ggtsdisplay(ex_7_5d.ts)adf test 실시
adf test 귀무가설은 비정상 시계열
귀무가설을 기각할 수 없으므로, 차분 필요
ndiffs 함수를 이용해서 최적 차분 차수 결정
library(tseries)## Warning: package 'tseries' was built under R version 4.0.5
adf.test(ex_7_5d.ts)##
## Augmented Dickey-Fuller Test
##
## data: ex_7_5d.ts
## Dickey-Fuller = -2.1037, Lag order = 4, p-value = 0.5337
## alternative hypothesis: stationary
ndiffs(ex_7_5d.ts)## [1] 1
ndiffs(ex_7_5d.ts, test = "adf")## [1] 1
원자료 확률보행과정
차분 이후 백색잡음과정
ex_7_5d_diff <- diff(ex_7_5d.ts, d = 1)
acf(ex_7_5d_diff)pacf(ex_7_5d_diff)ggtsdisplay(ex_7_5d_diff)후보모형
ARIMA(0, 1, 2)
ARIMA(2, 1, 2) : 하위 모형 다 선택
ARIMA(1, 1, 1)
ARIMA(1, 1, 2)
ARIMA(2, 1, 1)
절편 포함 여부 및 모수 유의성 확인
fit1 <- Arima(ex_7_5d.ts, order = c(0, 1, 2), include.drift = T)
confint(fit1)## 2.5 % 97.5 %
## ma1 -0.7716288 -0.28352060
## ma2 -0.4260835 0.04691902
## drift 1.2321333 2.11047020
비유의한 모수 빼고 다시 적합
fit1.1 <- Arima(ex_7_5d.ts, order = c(0, 1, 2), include.drift = T,
fixed = c(NA, 0, NA))
confint(fit1.1)## 2.5 % 97.5 %
## ma1 -0.832967 -0.5721323
## ma2 NA NA
## drift 1.198877 2.1251634
fit1.2 <- Arima(ex_7_5d.ts, order = c(0, 1, 1), include.drift = T)
confint(fit1.2)## 2.5 % 97.5 %
## ma1 -0.832967 -0.5721323
## drift 1.198877 2.1251634
# 둘다 결과는 같지만 예를 들어 MA1을 지워야할 경우 두번째 방식은 안되고, 첫번째 방식으로 해야됨 모형 검진
checkresiduals(fit1.1)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2) with drift
## Q* = 9.5841, df = 7, p-value = 0.2134
##
## Model df: 3. Total lags used: 10
과대적합 결과 확인
confint(Arima(ex_7_5d.ts, order = c(0, 1, 2), include.drift = T))## 2.5 % 97.5 %
## ma1 -0.7716288 -0.28352060
## ma2 -0.4260835 0.04691902
## drift 1.2321333 2.11047020
confint(Arima(ex_7_5d.ts, order = c(1, 1, 1), include.drift = T))## 2.5 % 97.5 %
## ar1 -0.1078336 0.3862985
## ma1 -0.8935299 -0.6123492
## drift 1.2201540 2.1136465
Arima(ex_7_5d.ts, order = c(1, 1, 1), include.drift = T)$aic## [1] 692.855
Arima(ex_7_5d.ts, order = c(1, 1, 2), include.drift = T)$aic## [1] 690.4239
Arima(ex_7_5d.ts, order = c(2, 1, 1), include.drift = T)$aic # 후보 모형## [1] 689.7133
Arima(ex_7_5d.ts, order = c(2, 1, 2), include.drift = T)$aic ## [1] 691.1555
Arima(ex_7_5d.ts, order = c(1, 1, 1), include.drift = T)$bic## [1] 703.2355
Arima(ex_7_5d.ts, order = c(1, 1, 2), include.drift = T)$bic## [1] 703.3995
Arima(ex_7_5d.ts, order = c(2, 1, 1), include.drift = T)$bic # 후보 모형 ## [1] 702.6889
Arima(ex_7_5d.ts, order = c(2, 1, 2), include.drift = T)$bic ## [1] 706.7262
fit2 <- Arima(ex_7_5d.ts, order = c(2, 1, 1), include.drift = T) 절편 포함 여부 및 모수 유의성 확인
confint(fit2)## 2.5 % 97.5 %
## ar1 -0.2201178 0.30250218
## ar2 -0.4981750 -0.05109239
## ma1 -0.8393299 -0.40694244
## drift 1.2129095 2.13080659
비유의한 모수 제외 후 다시 적합
fit2.1 <- Arima(ex_7_5d.ts, order = c(2, 1, 1), include.drift = T,
fixed = c(0, NA, NA, NA))
confint(fit2.1)## 2.5 % 97.5 %
## ar1 NA NA
## ar2 -0.4961900 -0.07586089
## ma1 -0.7652044 -0.43233015
## drift 1.2024243 2.13873965
모형 검진
checkresiduals(fit2.1)##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1) with drift
## Q* = 4.7351, df = 6, p-value = 0.5782
##
## Model df: 4. Total lags used: 10
과대적합 결과 확인
confint(Arima(ex_7_5d.ts, order = c(3, 1, 1), include.drift = T,
fixed = c(0, NA, NA, NA, NA))) # ar3 비유의 ## 2.5 % 97.5 %
## ar1 NA NA
## ar2 -0.4978847 -0.07691048
## ar3 -0.2146449 0.18012631
## ma1 -0.7651125 -0.42556878
## drift 1.2060652 2.13716849
confint(Arima(ex_7_5d.ts, order = c(2, 1, 2), include.drift = T,
fixed = c(0, NA, NA, NA, NA))) # ma2 비유의 ## 2.5 % 97.5 %
## ar1 NA NA
## ar2 -0.5095385 -0.02199123
## ma1 -0.7832922 -0.37066219
## ma2 -0.2623775 0.18707884
## drift 1.2139814 2.12895114
auto.arima()
aic를 최소로하는 모형 탐색이 default
bic를 최소로 하는 모형을 탐색하고자 하는 경우 ( ic = "bic) 추가
ARIMA(0, 1, 3) 잠정 모형 선택
auto.arima(ex_7_5d.ts)## Series: ex_7_5d.ts
## ARIMA(0,1,3) with drift
##
## Coefficients:
## ma1 ma2 ma3 drift
## -0.6022 -0.3651 0.2979 1.6658
## s.e. 0.1045 0.1118 0.1172 0.2483
##
## sigma^2 estimated as 56.61: log likelihood=-338.71
## AIC=687.43 AICc=688.07 BIC=700.4
auto.arima(ex_7_5d.ts, ic = "bic")## Series: ex_7_5d.ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## -0.7025 1.6620
## s.e. 0.0665 0.2363
##
## sigma^2 estimated as 60.7: log likelihood=-343.05
## AIC=692.1 AICc=692.35 BIC=699.89
fit3 <- Arima(ex_7_5d.ts, order = c(0, 1, 3), include.drift = T)절편 포함 여부 및 모수 유의성 확인
confint(fit3)## 2.5 % 97.5 %
## ma1 -0.80694420 -0.3974247
## ma2 -0.58417343 -0.1460184
## ma3 0.06824973 0.5276037
## drift 1.17912663 2.1523963
모형 검진
checkresiduals(fit3)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,3) with drift
## Q* = 3.3393, df = 6, p-value = 0.7652
##
## Model df: 4. Total lags used: 10
과대적합 결과 확인
confint(Arima(ex_7_5d.ts, order = c(1, 1, 3), include.drift = T)) ## 2.5 % 97.5 %
## ar1 0.007084997 0.8608423
## ma1 -1.443433477 -0.6033568
## ma2 -0.553314783 0.2881898
## ma3 0.170452848 0.6200290
## drift 1.062203337 2.2736501
confint(Arima(ex_7_5d.ts, order = c(0, 1, 4), include.drift = T))## 2.5 % 97.5 %
## ma1 -0.79629657 -0.4023714
## ma2 -0.65444266 -0.1494722
## ma3 0.02785128 0.4992412
## ma4 -0.11249099 0.3089903
## drift 1.14473771 2.1930321
절편 포함 여부 및 모수 유의성 확인
fit4 <- Arima(ex_7_5d.ts, order = c(1, 1, 3), include.drift = T,
fixed = c(NA, NA, 0, NA, NA))
confint(fit4)## 2.5 % 97.5 %
## ar1 0.2465178 0.7894623
## ma1 -1.2980385 -0.9668025
## ma2 NA NA
## ma3 0.1933871 0.4998636
## drift 1.0300466 2.3026748모형 검진
checkresiduals(fit4)##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,3) with drift
## Q* = 2.1061, df = 5, p-value = 0.8343
##
## Model df: 5. Total lags used: 10
과대적합 결과 확인
confint(Arima(ex_7_5d.ts, order = c(2, 1, 3), include.drift = T,
fixed = c(NA, NA, NA, 0, NA, NA)))## 2.5 % 97.5 %
## ar1 0.2300082 0.8010240
## ar2 -0.2982268 0.2019625
## ma1 -1.3220668 -0.9064129
## ma2 NA NA
## ma3 0.1765349 0.5109311
## drift 1.0520675 2.2877721
confint(Arima(ex_7_5d.ts, order = c(1, 1, 4), include.drift = T,
fixed = c(NA, NA, 0, NA, NA, NA)))## 2.5 % 97.5 %
## ar1 0.20357686 0.9750608
## ma1 -1.48257305 -0.9050508
## ma2 NA NA
## ma3 0.03500506 0.8576986
## ma4 -0.35740462 0.2078702
## drift 1.04260684 2.2813648
ARIMA(1, 1, 3) 모형 최종 모형으로 선택
c(fit1.1$aic, fit2.1$aic, fit4$aic)## [1] 692.1001 687.8079 685.9708
c(fit1.1$aicc, fit2.1$aicc, fit4$aicc)## [1] 692.3527 688.2334 686.6160
c(fit1.1$bic, fit2.1$bic, fit4$bic)## [1] 699.8854 698.1884 698.9464
summary(forecast(fit4))##
## Forecast method: ARIMA(1,1,3) with drift
##
## Model Information:
## Series: ex_7_5d.ts
## ARIMA(1,1,3) with drift
##
## Coefficients:
## ar1 ma1 ma2 ma3 drift
## 0.5180 -1.1324 0 0.3466 1.6664
## s.e. 0.1385 0.0845 0 0.0782 0.3247
##
## sigma^2 estimated as 55.68: log likelihood=-337.99
## AIC=685.97 AICc=686.62 BIC=698.95
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01229316 7.272843 5.840784 -0.4131836 7.32194 0.8132738
## ACF1
## Training set 0.006767061
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 101 180.4213 170.8586 189.9839 165.7965 195.0461
## 102 183.1746 172.9258 193.4235 167.5004 198.8489
## 103 185.8045 175.5355 196.0735 170.0994 201.5096
## 104 187.9699 177.4283 198.5116 171.8479 204.0919
## 105 189.8948 178.8541 200.9355 173.0095 206.7802
## 106 191.6951 180.0354 203.3548 173.8631 209.5271
## 107 193.4308 181.1073 205.7543 174.5836 212.2780
## 108 195.1331 182.1407 208.1255 175.2629 215.0033
## 109 196.8181 183.1698 210.4664 175.9448 217.6913
## 110 198.4941 184.2102 212.7780 176.6487 220.3394
plot(forecast(fit4))ARIMA 모형에 계절 요소가 반영된 모형
library(forecast)
model <- Arima(ts(rnorm(100),freq=12), order=c(0,0,0), seasonal=c(1,0,0),
fixed=c(Phi=0.8, Theta=-0.8))
foo <- simulate(model, nsim=1000)
ggtsdisplay(foo)강한 계절요소로 인해 계절 추세가 존재하는 경우
비계절형 ARIMA 요소, 계절형 ARIMA 요소를 모두 갖고 있는 모형
모형 적합 순서
차분 차수 결정
일반 차분 실시 : ACF가 시차 1, 2, …에서 매우 천천히 감소하는 경우
계절 차분 실시 : ACF가 시차 s, 2s, … 에서 매우 천천히 감소하는 경우
일반적인 경우 d + D는 2이하로 함
ndiffs(x) : 일반 차분 차수 추정
nsdiffs(x, m = 계절 주기) : 계절차분 차수 추정
AR(p, P), MA(q, Q) 차수 결정
비계절형 차수 p, q : ACF, PACF를 이용한 일반적인 모형 인식 방법 사용
계절형 차수 P, Q : ACF, PACF에서 시차 s, 2s, 3s, …을 대상으로 인식
절편 포함 여부 결정
이전과 동일하게 신뢰구간을 이용해서 절편 포함 여부 결정
2번 차분을 한 경우 절편은 모형에 포함 x
1984.1 ~ 1988.12 백화점 매출액 arima 모형 적합하고 12 선행 시차에 대해 예측 실시
depart <- scan("data/timeseries/depart.txt")
depart.ts <- ts(depart, start = c(1984), frequency = 12)
plot(depart.ts)lndepart <- log(depart.ts)
plot(lndepart)library(forecast)
ggseasonplot(lndepart) ggseasonplot(lndepart,polar=TRUE)Acf(lndepart, lag.max = 48)ndiffs(lndepart)## [1] 1
nsdiffs(lndepart)## [1] 1
d=1의 경우
추가적인 계절 차분이 필요해보임
lndepart_1 <- diff(lndepart)
ggtsdisplay(lndepart_1,lag.max=48)D=1의 경우
lndepart_12 <- diff(lndepart,lag=12)
ggtsdisplay(lndepart_12,lag.max=48)d=1, D=1의 경우 - 정상성 확보 - 최적의 차분 : d = 1, D = 1
lndepart_1_12 <- diff(lndepart_1,lag=12)
ggtsdisplay(lndepart_1_12,lag.max=48)ggtsdisplay(lndepart_1_12, lag.max = 48)fit1 <- Arima(lndepart, order=c(0,1,1), seasonal=list(order=c(0,1,0),period=12))
confint(fit1) # 모든 모수 유의 ## 2.5 % 97.5 %
## ma1 -0.7834866 -0.3430529
checkresiduals(fit1) # 모형 가정 만족 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,0)[12]
## Q* = 14.769, df = 11, p-value = 0.1933
##
## Model df: 1. Total lags used: 12
과대적합
confint(Arima(lndepart,order=c(1,1,1), seasonal=list(order=c(0,1,0),period=12)))## 2.5 % 97.5 %
## ar1 -0.4675603 0.3998847
## ma1 -0.8663890 -0.2253052
confint(Arima(lndepart,order=c(0,1,2), seasonal=list(order=c(0,1,0),period=12)))## 2.5 % 97.5 %
## ma1 -0.9562689 -0.2298099
## ma2 -0.2958022 0.3652605
fit2 <- Arima(lndepart,order=c(2,1,0), seasonal=list(order=c(0,1,0),period=12))
confint(fit2) # 모든 모수 유의 ## 2.5 % 97.5 %
## ar1 -0.8157177 -0.23808436
## ar2 -0.6214768 -0.05006133
checkresiduals(fit2) # 가정 만족 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)(0,1,0)[12]
## Q* = 17.7, df = 10, p-value = 0.06025
##
## Model df: 2. Total lags used: 12
과대 적합
confint(Arima(lndepart,order=c(3,1,0), seasonal=list(order=c(0,1,0),period=12)))## 2.5 % 97.5 %
## ar1 -0.8739910 -0.26541395
## ar2 -0.7344992 -0.07719871
## ar3 -0.4476723 0.18256271
confint(Arima(lndepart,order=c(2,1,1), seasonal=list(order=c(0,1,0),period=12)))## 2.5 % 97.5 %
## ar1 -0.8095983 0.3742461
## ar2 -0.6154847 0.1764711
## ma1 -0.9264064 0.2084493
fit3 <- auto.arima(lndepart, d = 1, D = 1)
fit3## Series: lndepart
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.5840 -0.4159
## s.e. 0.1093 0.1946
##
## sigma^2 estimated as 0.0005401: log likelihood=110.29
## AIC=-214.59 AICc=-214.03 BIC=-209.04
confint(fit3)## 2.5 % 97.5 %
## ma1 -0.7982641 -0.36981376
## sma1 -0.7973317 -0.03443859
checkresiduals(fit3)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,1)[12]
## Q* = 12.817, df = 10, p-value = 0.2341
##
## Model df: 2. Total lags used: 12
과대 적합
confint(Arima(lndepart,order=c(1,1,1), seasonal=list(order=c(0,1,1),period=12)))## 2.5 % 97.5 %
## ar1 -0.4596994 0.38086082
## ma1 -0.8673157 -0.26263265
## sma1 -0.7964791 -0.03491842
confint(Arima(lndepart,order=c(0,1,2), seasonal=list(order=c(0,1,1),period=12)))## 2.5 % 97.5 %
## ma1 -1.0150960 -0.24179869
## ma2 -0.3098821 0.41114957
## sma1 -0.7958091 -0.03618881
c(fit1$aic, fit2$aic, fit3$aic)## [1] -212.4561 -210.6719 -214.5855
c(fit1$bic, fit2$bic, fit3$bic)## [1] -208.7558 -205.1215 -209.0350
# 최종 모형 fit3fit3_1 <- Arima(depart.ts,order=c(0,1,1), seasonal=list(order=c(0,1,1), period=12),
lambda=0)
fit3_1## Series: depart.ts
## ARIMA(0,1,1)(0,1,1)[12]
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 sma1
## -0.5840 -0.4159
## s.e. 0.1093 0.1946
##
## sigma^2 estimated as 0.0005401: log likelihood=110.29
## AIC=-214.59 AICc=-214.03 BIC=-209.04
fit3## Series: lndepart
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.5840 -0.4159
## s.e. 0.1093 0.1946
##
## sigma^2 estimated as 0.0005401: log likelihood=110.29
## AIC=-214.59 AICc=-214.03 BIC=-209.04
forecast(fit3_1, h=12, level=95)## Point Forecast Lo 95 Hi 95
## Jan 1989 843.2496 805.6917 882.5584
## Feb 1989 861.0162 819.5607 904.5686
## Mar 1989 1196.5509 1134.9464 1261.4993
## Apr 1989 1094.6421 1034.8703 1157.8662
## May 1989 1008.7262 950.6838 1070.3124
## Jun 1989 1054.5252 990.9145 1122.2193
## Jul 1989 1512.3735 1417.1408 1614.0059
## Aug 1989 1025.0112 957.8737 1096.8544
## Sep 1989 976.3806 910.0591 1047.5353
## Oct 1989 1229.0466 1142.6938 1321.9250
## Nov 1989 1235.2486 1145.6798 1331.8199
## Dec 1989 2690.5489 2489.6022 2907.7149
plot(forecast(fit3_1, h=12, level=95))summary(forecast(fit3_1,h=12,level=95))##
## Forecast method: ARIMA(0,1,1)(0,1,1)[12]
##
## Model Information:
## Series: depart.ts
## ARIMA(0,1,1)(0,1,1)[12]
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 sma1
## -0.5840 -0.4159
## s.e. 0.1093 0.1946
##
## sigma^2 estimated as 0.0005401: log likelihood=110.29
## AIC=-214.59 AICc=-214.03 BIC=-209.04
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.937472 16.9307 11.60509 -0.200563 1.36766 0.1084166 0.04358066
##
## Forecasts:
## Point Forecast Lo 95 Hi 95
## Jan 1989 843.2496 805.6917 882.5584
## Feb 1989 861.0162 819.5607 904.5686
## Mar 1989 1196.5509 1134.9464 1261.4993
## Apr 1989 1094.6421 1034.8703 1157.8662
## May 1989 1008.7262 950.6838 1070.3124
## Jun 1989 1054.5252 990.9145 1122.2193
## Jul 1989 1512.3735 1417.1408 1614.0059
## Aug 1989 1025.0112 957.8737 1096.8544
## Sep 1989 976.3806 910.0591 1047.5353
## Oct 1989 1229.0466 1142.6938 1321.9250
## Nov 1989 1235.2486 1145.6798 1331.8199
## Dec 1989 2690.5489 2489.6022 2907.7149
tour <- scan('data/timeseries/tourist.txt')
tour.ts <- ts(tour, start = 1981, frequency = 12)
plot(tour.ts)추세 및 계절 변동이 비교적 규칙적인 경우 추세와 계절 변동을 회귀모형으로 설명
일반 화귀모형은 오차의 독립성을 가정함
시계열 자료 특성상 서로 독립인 오차를 가정하는 것은 불가능
오차 사이의 상관관계, 즉 자기상관을 설명하기 위한 추후 조치가 필요
추세 성분만 있는 모형
\(Z_t = \beta_0 + \beta_1t + \cdots + \beta_pt^p + \epsilon_t\)
계절 성분만 있는 모형
\(Z_t = \sum_{i = 1}^s \beta_iD_{t,i} + \epsilon_t, \quad D_{t,i} = 1, \, t=i(\text{mod s})\)
example
1차 추세와 계절 성분이 함께 있는 모형
\(Z_t = \beta_1t + \sum_{i = 1}^{s+1} \beta_iD_{t,i} + \epsilon_t, \quad D_{t,i} = 1, \, t=i(\text{mod s})\)
시계열 자료와 같이 오차가 서로 독립이 아닌 경우의 모형
\(Z_t = \beta_1t + \sum_{i=2}^{s+1} \beta_iD_{t,i} + \epsilon_t, \quad \epsilon_t \sim ARMA(p,q)\)
잔차 시계열 그림
잔차 Q-QPLOT or 히스토그램
잔차
ACF
Ljung Box test
Durbin-watson test : 오차항의 1차 자기상관 존재 여부에 대한 통계적 검정 (Ljung box test가 더 포괄적이므로 생략)
pop <- scan('data/timeseries/pop.txt')
pop <- round(pop/10000)
pop.ts <- ts(pop, start = 1960, freq = 1)
plot(pop.ts, ylab = "population")변수 t 생성
Time <- time(pop.ts)
fit1 <- lm(pop.ts~Time)
summary(fit1)##
## Call:
## lm(formula = pop.ts ~ Time)
##
## Residuals:
## Min 1Q Median 3Q Max
## -115.40 -48.30 16.87 54.37 63.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.091e+05 1.868e+03 -58.43 <2e-16 ***
## Time 5.701e+01 9.444e-01 60.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 58.87 on 34 degrees of freedom
## Multiple R-squared: 0.9908, Adjusted R-squared: 0.9905
## F-statistic: 3644 on 1 and 34 DF, p-value: < 2.2e-16
checkresiduals(fit1)##
## Breusch-Godfrey test for serial correlation of order up to 7
##
## data: Residuals
## LM test = 31.036, df = 7, p-value = 6.124e-05
fit2 <- lm(pop.ts ~ Time + I(Time^2))
summary(fit2)##
## Call:
## lm(formula = pop.ts ~ Time + I(Time^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.365 -4.779 -1.049 3.798 17.631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.410e+06 5.185e+04 -46.49 <2e-16 ***
## Time 2.384e+03 5.244e+01 45.47 <2e-16 ***
## I(Time^2) -5.885e-01 1.326e-02 -44.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.67 on 33 degrees of freedom
## Multiple R-squared: 0.9998, Adjusted R-squared: 0.9998
## F-statistic: 1.083e+05 on 2 and 33 DF, p-value: < 2.2e-16
checkresiduals(fit2)##
## Breusch-Godfrey test for serial correlation of order up to 7
##
## data: Residuals
## LM test = 34.402, df = 7, p-value = 1.448e-05
fit3 <- lm(log(pop.ts)~Time + I(Time^2))
summary(fit3)##
## Call:
## lm(formula = log(pop.ts) ~ Time + I(Time^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.009306 -0.003520 -0.000374 0.003284 0.010159
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.199e+03 3.016e+01 -39.75 <2e-16 ***
## Time 1.204e+00 3.050e-02 39.49 <2e-16 ***
## I(Time^2) -3.004e-04 7.712e-06 -38.95 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004461 on 33 degrees of freedom
## Multiple R-squared: 0.9994, Adjusted R-squared: 0.9993
## F-statistic: 2.664e+04 on 2 and 33 DF, p-value: < 2.2e-16
checkresiduals(fit3)##
## Breusch-Godfrey test for serial correlation of order up to 7
##
## data: Residuals
## LM test = 27.984, df = 7, p-value = 0.0002213
resid <- ts(fit3$resid, start = 1960)
ggtsdisplay(resid)fit_r1 <- Arima(resid, order = c(1, 0, 0), include.mean = F)
checkresiduals(fit_r1) # 가정 위반 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 30.042, df = 6, p-value = 3.86e-05
##
## Model df: 1. Total lags used: 7
fit_r2 <- Arima(resid, order = c(2, 0, 0), include.mean = F)
fit_r3 <- Arima(resid, order = c(1, 0, 1), include.mean = F)
confint(fit_r2) # 추가된 모수 유의 ## 2.5 % 97.5 %
## ar1 1.770756 1.9740120
## ar2 -1.038370 -0.8609524
confint(fit_r3) # 추가된 모수 유의 ## 2.5 % 97.5 %
## ar1 0.8861058 1.049141
## ma1 0.5594046 0.910749
checkresiduals(fit_r2) # AR(2) 가정 만족 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 3.0411, df = 5, p-value = 0.6936
##
## Model df: 2. Total lags used: 7
checkresiduals(fit_r3) # 가정 위반 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1) with zero mean
## Q* = 16.314, df = 5, p-value = 0.006004
##
## Model df: 2. Total lags used: 7
confint(Arima(resid, order = c(2, 0, 1), include.mean = F)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 1.7436707 1.9825692
## ar2 -1.0500424 -0.8288453
## ma1 -0.2178853 0.4054730
confint(Arima(resid, order = c(3, 0, 0), include.mean = F)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 1.6411158 2.3518888
## ar2 -1.8677351 -0.5185865
## ar3 -0.2308685 0.4937741
fit_x <- model.matrix(fit3)[,-1]
f1 <- Arima(pop.ts, order = c(2, 0, 0), xreg = fit_x, lambda = 0)
f1## Series: pop.ts
## Regression with ARIMA(2,0,0) errors
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 ar2 intercept Time I(Time^2)
## 1.8665 -0.9234 -1183.5728 1.1886 -3e-04
## s.e. 0.0611 0.0581 8.2792 0.0087 0e+00
##
## sigma^2 estimated as 6.179e-07: log likelihood=205.62
## AIC=-399.24 AICc=-396.34 BIC=-389.73
checkresiduals(f1) # 가정 만족 ##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0) errors
## Q* = 3.0095, df = 3, p-value = 0.3902
##
## Model df: 5. Total lags used: 8
new_x <- time(ts(start = 1996, end = 2000))
fore_1 <- forecast(f1, xreg = cbind(new_x, new_x^2)) # xreg = 모델의 x인 t, t^2를 만듦 ## Warning in forecast.forecast_ARIMA(f1, xreg = cbind(new_x, new_x^2)): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
plot(fore_1)accuracy(fore_1)## ME RMSE MAE MPE MAPE MASE
## Training set 0.05364656 2.621428 1.902829 0.002791545 0.05456001 0.03316684
## ACF1
## Training set 0.1606469
depart <- scan("data/timeseries/depart.txt")
depart.ts <- ts(depart, start = c(1984), frequency = 12)plot(depart.ts)lndepart <- log(depart.ts)
plot(lndepart)Time <- time(lndepart) # 추세 변수 생성
Month <- cycle(lndepart) # 계절 추세 변수 생성 fit1 <- lm(lndepart ~ Time + factor(Month) + 0) # 0 : 절편 제거
summary(fit1) # 지시변수 중 비유의한 것 있어도 제외 x ##
## Call:
## lm(formula = lndepart ~ Time + factor(Month) + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.038679 -0.018689 -0.001468 0.015185 0.057288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Time 0.12792 0.00231 55.39 <2e-16 ***
## factor(Month)1 -247.72500 4.58685 -54.01 <2e-16 ***
## factor(Month)2 -247.70839 4.58705 -54.00 <2e-16 ***
## factor(Month)3 -247.40807 4.58724 -53.93 <2e-16 ***
## factor(Month)4 -247.49384 4.58743 -53.95 <2e-16 ***
## factor(Month)5 -247.57595 4.58762 -53.97 <2e-16 ***
## factor(Month)6 -247.56941 4.58782 -53.96 <2e-16 ***
## factor(Month)7 -247.20068 4.58801 -53.88 <2e-16 ***
## factor(Month)8 -247.60491 4.58820 -53.97 <2e-16 ***
## factor(Month)9 -247.68907 4.58839 -53.98 <2e-16 ***
## factor(Month)10 -247.45574 4.58859 -53.93 <2e-16 ***
## factor(Month)11 -247.44748 4.58878 -53.92 <2e-16 ***
## factor(Month)12 -246.67871 4.58897 -53.76 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0253 on 47 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 3.199e+05 on 13 and 47 DF, p-value: < 2.2e-16
checkresiduals(fit1)##
## Breusch-Godfrey test for serial correlation of order up to 16
##
## data: Residuals
## LM test = 36.542, df = 16, p-value = 0.002432
resid <- fit1$residuals
ggtsdisplay(resid) confint(Arima(resid, order = c(3, 0, 0), include.mean = F)) # ar2 비유의 ## 2.5 % 97.5 %
## ar1 0.1097876 0.5924598
## ar2 -0.2190874 0.3184025
## ar3 0.1368731 0.6274369
fit_r1 <- Arima(resid, order = c(3, 0, 0), include.mean = F,
fixed = c(NA, 0, NA))
confint(fit_r1)## 2.5 % 97.5 %
## ar1 0.1576378 0.5859797
## ar2 NA NA
## ar3 0.1862973 0.6202480
checkresiduals(fit_r1, lag.max = 48)##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0) with zero mean
## Q* = 8.2247, df = 7, p-value = 0.3132
##
## Model df: 3. Total lags used: 10
confint(Arima(resid, order = c(3, 0, 1), include.mean = F, fixed = c(NA, 0, NA, NA)))## 2.5 % 97.5 %
## ar1 -0.16881705 0.9289584
## ar2 NA NA
## ar3 0.06583327 0.7321927
## ma1 -0.70846193 0.6855687
confint(Arima(resid, order = c(4, 0, 0), include.mean = F, fixed = c(NA, 0, NA, NA)))## 2.5 % 97.5 %
## ar1 0.1474370 0.6341337
## ar2 NA NA
## ar3 0.1787318 0.6634399
## ar4 -0.3304956 0.2376827
fit_x <- model.matrix(fit1)
fit2 <- Arima(depart.ts, order = c(3, 0, 0), xreg = fit_x, include.mean = F,
fixed = c(NA, 0, NA, rep(NA, 13)), lambda = 0)
coef(fit2)## ar1 ar2 ar3 Time factor(Month)1
## 0.3721191 0.0000000 0.4145015 0.1302350 -252.3139317
## factor(Month)2 factor(Month)3 factor(Month)4 factor(Month)5 factor(Month)6
## -252.2990950 -251.9993784 -252.0846335 -252.1672680 -252.1617338
## factor(Month)7 factor(Month)8 factor(Month)9 factor(Month)10 factor(Month)11
## -251.7928366 -252.1966632 -252.2827161 -252.0496915 -252.0385725
## factor(Month)12
## -251.2736958
confint(fit2) # 모든 모수 유의 ## 2.5 % 97.5 %
## ar1 4.205796e-02 0.7021802
## ar2 NA NA
## ar3 1.538731e-03 0.8274642
## Time 1.202352e-01 0.1402348
## factor(Month)1 -2.720167e+02 -232.6111917
## factor(Month)2 -2.720015e+02 -232.5967039
## factor(Month)3 -2.717021e+02 -232.2966492
## factor(Month)4 -2.717866e+02 -232.3826948
## factor(Month)5 -2.718691e+02 -232.4654801
## factor(Month)6 -2.718640e+02 -232.4594577
## factor(Month)7 -2.714952e+02 -232.0905124
## factor(Month)8 -2.718991e+02 -232.4942177
## factor(Month)9 -2.719860e+02 -232.5794706
## factor(Month)10 -2.717536e+02 -232.3458081
## factor(Month)11 -2.717428e+02 -232.3343937
## factor(Month)12 -2.709789e+02 -231.5684639
checkresiduals(fit2, lag.max = 48)##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(3,0,0) errors
## Q* = 19.109, df = 3, p-value = 0.0002596
##
## Model df: 16. Total lags used: 19
new_t <- time(ts(start = c(1989,1), end = c(1989, 12), freq =12))
new_x <- cbind(new_t, diag(rep(1, 12)))
fore_2 <- forecast(fit2, xreg = new_x)## Warning in forecast.forecast_ARIMA(fit2, xreg = new_x): xreg contains different
## column names from the xreg used in training. Please check that the regressors
## are in the same order.
plot(fore_2)accuracy(fore_2)## ME RMSE MAE MPE MAPE MASE
## Training set 0.1605952 14.28129 10.34678 0.01541014 1.25833 0.0966612
## ACF1
## Training set 0.06256301
AP <- AirPassengers
AP## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
AP %>% glimpse()## Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
class(AP)## [1] "ts"
start(AP)## [1] 1949 1
end(AP)## [1] 1960 12
frequency(AP)## [1] 12
plot(AP)cycle(AP) # 각 자료의 해당 월을 추출 ## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
boxplot(AP~cycle(AP))Maine <- fread('data/timeseries/Maine.txt')
Maine## unemploy
## 1: 6.7
## 2: 6.7
## 3: 6.4
## 4: 5.9
## 5: 5.2
## ---
## 124: 4.6
## 125: 4.2
## 126: 4.4
## 127: 4.4
## 128: 3.9
str(Maine)## Classes 'data.table' and 'data.frame': 128 obs. of 1 variable:
## $ unemploy: num 6.7 6.7 6.4 5.9 5.2 4.8 4.8 4 4.2 4.4 ...
## - attr(*, ".internal.selfref")=<externalptr>
class(Maine)## [1] "data.table" "data.frame"
ts(data, start, end, frequency = 12)
data : 시계열 자료로 변환시키고자 하는 벡터
start : 자료의 시작점 지정
end : 자료의 끝점 지정(생략하면 모든 데이터 포함)
freq: 주기 지정. 월별 자료는 12
Maine.ts <- ts(Maine$unemploy, start = c(1996, 1), frequency = 12)
plot(Maine.ts)window(data, start, end)
data : 시계열 자료로 변환시키고자 하는 벡터
start : 자료의 시작점 지정
end : 자료의 끝점 지정(생략하면 모든 데이터 포함)
Maine.2000 <- window(Maine.ts, start = c(2000, 1))
plot(Maine.2000)cbe <- fread('data/timeseries/cbe.txt')
cbe## choc beer elec
## 1: 1451 96.3 1497
## 2: 2037 84.4 1463
## 3: 2477 91.2 1648
## 4: 2785 81.9 1595
## 5: 2994 80.5 1777
## ---
## 392: 8715 148.3 14338
## 393: 8450 133.5 12867
## 394: 9085 193.8 12761
## 395: 8350 208.4 12449
## 396: 7080 197.0 12658
choc.ts <- ts(cbe$choc, start = 1958, freq = 12)
beer.ts <- ts(cbe$beer, start = 1958, freq = 12)
elec.ts <- ts(cbe$elec, start = 1958, freq = 12)
plot(cbind(choc.ts, beer.ts, elec.ts), main = "")global <- fread('data/timeseries/global.txt')
global## V1 V2 V3 V4 V5
## 1: -0.384 -0.457 -0.673 -0.344 -0.311
## 2: -0.071 -0.246 -0.235 -0.380 -0.418
## 3: -0.670 -0.386 -0.437 -0.150 -0.528
## 4: -0.692 -0.629 -0.363 -0.375 -0.328
## 5: -0.495 -0.646 -0.754 -0.137 -0.452
## ---
## 356: 0.573 0.508 0.619 0.527 0.469
## 357: 0.295 0.358 0.364 0.436 0.452
## 358: 0.494 0.586 0.385 0.502 0.355
## 359: 0.512 0.553 0.494 0.516 0.537
## 360: 0.510 0.526 0.514 0.493 0.305
global_s <- scan('data/timeseries/global.txt')
global_s## [1] -0.384 -0.457 -0.673 -0.344 -0.311 -0.071 -0.246 -0.235 -0.380 -0.418
## [11] -0.670 -0.386 -0.437 -0.150 -0.528 -0.692 -0.629 -0.363 -0.375 -0.328
## [21] -0.495 -0.646 -0.754 -0.137 -0.452 -1.031 -0.643 -0.328 -0.311 -0.263
## [31] -0.248 -0.274 -0.203 -0.121 -0.913 -0.197 -0.249 -0.041 -0.082 -0.172
## [41] -0.085 -0.278 -0.220 -0.132 -0.436 -0.234 -0.288 -0.486 -0.070 -0.526
## [51] -0.599 -0.420 -0.273 -0.063 -0.182 -0.256 -0.213 -0.326 -0.696 -0.813
## [61] -0.858 -0.415 -0.431 -0.443 -0.735 -0.169 -0.227 -0.131 -0.377 -0.375
## [71] -0.434 -0.209 -0.711 -0.817 -0.435 -0.232 -0.194 -0.322 -0.466 -0.623
## [81] -0.345 -0.382 -0.932 -0.768 0.263 -0.063 -0.379 -0.187 -0.320 -0.365
## [91] -0.510 -0.359 -0.291 -0.431 -0.362 -0.276 -0.834 -0.604 -0.516 -0.482
## [101] -0.399 -0.200 -0.138 -0.332 -0.394 -0.711 -0.507 -0.587 -0.125 -0.615
## [111] -0.597 -0.135 -0.152 -0.227 -0.153 -0.267 0.002 -0.358 -0.200 -0.324
## [121] 0.095 -0.173 -0.396 -0.119 -0.533 0.078 -0.089 -0.282 -0.224 -0.364
## [131] -0.294 -0.368 -0.306 0.087 -0.665 -0.235 -0.452 -0.316 -0.267 -0.218
## [141] -0.073 -0.177 -0.317 -0.540 -0.718 -0.382 0.004 -0.281 -0.029 -0.113
## [151] 0.067 -0.135 -0.234 -0.247 -0.517 -0.098 -0.159 0.241 -0.602 -0.232
## [161] -0.210 -0.372 -0.295 -0.228 -0.252 -0.581 -0.489 -0.487 -0.072 -0.462
## [171] -0.426 -0.246 -0.260 -0.242 -0.110 -0.216 -0.248 -0.352 -0.133 -0.729
## [181] -0.349 -0.528 -0.058 -0.157 -0.215 -0.216 -0.129 -0.218 -0.437 -0.505
## [191] -0.613 -0.643 -0.378 -0.385 -0.469 -0.163 -0.109 -0.117 -0.206 -0.146
## [201] -0.171 -0.269 -0.359 -0.385 -0.090 -0.397 -0.365 -0.414 -0.433 -0.187
## [211] -0.138 -0.183 -0.302 -0.541 -0.508 -0.397 -0.033 -0.407 -0.569 -0.573
## [221] -0.470 -0.300 -0.111 -0.342 -0.248 -0.476 -0.564 -0.425 -0.665 -0.673
## [231] -0.561 -0.457 -0.099 -0.162 -0.330 -0.233 -0.390 -0.463 -0.533 -0.493
## [241] -0.320 -0.361 -0.451 -0.407 -0.548 -0.387 -0.272 -0.239 -0.456 -0.465
## [251] -0.739 -0.812 -0.401 -0.085 -0.310 -0.442 -0.530 -0.155 -0.136 -0.165
## [261] -0.121 -0.157 -0.186 0.142 0.009 0.204 0.341 0.159 -0.125 -0.066
## [271] -0.134 -0.116 -0.113 -0.169 -0.274 -0.430 -0.244 -0.254 -0.145 -0.330
## [281] -0.279 -0.222 -0.166 -0.298 -0.220 -0.217 -0.537 -0.555 -0.108 -0.264
## [291] -0.300 -0.217 -0.334 -0.382 -0.280 -0.112 -0.274 -0.434 -0.557 -0.280
## [301] -0.506 -0.354 -0.231 -0.174 -0.020 -0.211 -0.029 -0.104 -0.221 -0.351
## [311] -0.497 -0.246 -0.070 -0.123 -0.070 -0.276 -0.266 -0.321 -0.221 -0.166
## [321] -0.252 -0.365 -0.489 -0.530 -0.480 -0.382 -0.437 -0.327 -0.317 -0.043
## [331] -0.146 -0.241 -0.335 -0.435 -0.348 -0.315 -0.402 -0.300 -0.362 -0.444
## [341] -0.345 -0.344 -0.243 -0.198 -0.290 -0.300 -0.559 -0.384 -0.551 -0.481
## [351] -0.343 -0.426 -0.386 -0.432 -0.268 -0.301 -0.241 -0.309 -0.311 -0.150
## [361] -0.340 -0.490 -0.339 -0.140 -0.061 -0.236 -0.138 -0.134 -0.202 -0.341
## [371] -0.362 -0.290 -0.439 -0.483 -0.363 -0.426 -0.262 -0.302 -0.151 -0.278
## [381] -0.259 -0.465 -0.365 -0.357 -0.629 -0.512 -0.556 -0.281 -0.301 -0.234
## [391] -0.265 -0.260 -0.139 -0.114 -0.218 -0.234 -0.195 -0.144 -0.142 -0.120
## [401] -0.036 -0.088 -0.200 -0.266 -0.270 -0.351 -0.436 -0.185 -0.347 -0.308
## [411] -0.334 -0.315 -0.454 -0.398 -0.406 -0.406 -0.466 -0.472 -0.596 -0.423
## [421] -0.583 -0.555 -0.422 -0.364 -0.256 -0.285 -0.312 -0.258 -0.167 -0.319
## [431] -0.537 -0.163 -0.477 -0.099 -0.453 -0.481 -0.407 -0.250 -0.397 -0.351
## [441] -0.232 -0.439 -0.571 -0.746 -1.032 -0.803 -0.386 -0.538 -0.518 -0.342
## [451] -0.185 -0.314 -0.300 -0.259 -0.368 -0.329 -0.422 -0.333 -0.370 -0.407
## [461] -0.435 -0.467 -0.342 -0.358 -0.500 -0.417 -0.474 -0.408 -0.567 -0.688
## [471] -0.465 -0.356 -0.353 -0.291 -0.384 -0.257 -0.205 -0.245 -0.274 -0.267
## [481] -0.241 -0.200 -0.412 -0.358 -0.096 -0.133 -0.145 -0.148 -0.155 -0.099
## [491] -0.311 -0.091 -0.290 -0.168 -0.323 -0.095 -0.011 -0.076 -0.111 -0.119
## [501] -0.105 -0.149 -0.414 -0.350 -0.053 -0.297 -0.770 -0.396 -0.398 -0.205
## [511] -0.269 -0.255 -0.258 -0.446 -0.461 -0.265 -0.247 -0.505 -0.518 -0.294
## [521] -0.250 -0.291 -0.173 -0.151 -0.115 -0.135 0.054 -0.404 -0.302 -0.269
## [531] -0.339 -0.250 -0.159 -0.067 -0.153 -0.173 -0.140 -0.038 -0.312 -0.089
## [541] -0.134 -0.255 -0.256 -0.144 -0.178 -0.149 -0.236 -0.238 -0.314 -0.325
## [551] -0.426 -0.436 -0.083 -0.149 -0.321 -0.423 -0.344 -0.329 -0.298 -0.353
## [561] -0.368 -0.449 -0.522 -0.544 -0.321 -0.136 -0.421 -0.445 -0.441 -0.473
## [571] -0.459 -0.492 -0.524 -0.532 -0.533 -0.575 -0.626 -0.546 -0.649 -0.561
## [581] -0.452 -0.403 -0.416 -0.373 -0.358 -0.350 -0.266 -0.330 -0.470 -0.679
## [591] -0.503 -0.549 -0.342 -0.288 -0.280 -0.265 -0.251 -0.342 -0.238 -0.233
## [601] -0.171 -0.319 -0.375 -0.182 -0.251 -0.226 -0.308 -0.238 -0.318 -0.300
## [611] -0.503 -0.312 -0.531 -0.531 -0.426 -0.604 -0.661 -0.556 -0.478 -0.459
## [621] -0.382 -0.363 -0.595 -0.483 -0.432 -0.465 -0.635 -0.515 -0.523 -0.379
## [631] -0.387 -0.446 -0.386 -0.516 -0.571 -0.490 -0.538 -0.514 -0.635 -0.572
## [641] -0.529 -0.444 -0.493 -0.285 -0.276 -0.291 -0.287 -0.516 -0.317 -0.435
## [651] -0.441 -0.404 -0.447 -0.460 -0.331 -0.386 -0.372 -0.468 -0.585 -0.640
## [661] -0.520 -0.612 -0.643 -0.666 -0.500 -0.466 -0.383 -0.372 -0.346 -0.409
## [671] -0.352 -0.273 -0.354 -0.260 -0.457 -0.347 -0.354 -0.251 -0.414 -0.507
## [681] -0.494 -0.573 -0.461 -0.399 -0.379 -0.464 -0.602 -0.471 -0.520 -0.516
## [691] -0.370 -0.302 -0.346 -0.394 -0.230 -0.133 -0.063 -0.194 -0.330 -0.368
## [701] -0.259 -0.198 -0.183 -0.270 -0.299 -0.140 -0.328 -0.360 -0.190 -0.004
## [711] -0.408 -0.050 -0.218 -0.104 -0.032 -0.159 -0.085 -0.265 -0.169 -0.248
## [721] -0.242 -0.237 -0.529 -0.339 -0.374 -0.433 -0.300 -0.250 -0.275 -0.310
## [731] -0.510 -0.658 -0.514 -0.785 -0.788 -0.503 -0.748 -0.374 -0.252 -0.319
## [741] -0.150 -0.411 -0.353 -0.787 -0.540 -0.557 -0.537 -0.541 -0.598 -0.324
## [751] -0.393 -0.417 -0.344 -0.137 -0.231 -0.269 -0.198 -0.157 -0.355 -0.104
## [761] -0.295 -0.232 -0.254 -0.289 -0.171 -0.305 -0.634 -0.484 -0.246 -0.414
## [771] -0.196 -0.275 -0.176 -0.265 -0.333 -0.238 -0.175 -0.304 -0.412 -0.443
## [781] -0.105 -0.242 -0.221 -0.180 -0.131 -0.149 -0.116 -0.304 -0.265 -0.197
## [791] -0.438 -0.238 -0.370 -0.350 -0.336 -0.321 -0.332 -0.311 -0.285 -0.285
## [801] -0.310 -0.332 -0.299 -0.325 -0.195 -0.463 -0.354 -0.452 -0.354 -0.262
## [811] -0.382 -0.433 -0.303 -0.224 -0.061 -0.058 -0.320 -0.308 -0.320 -0.406
## [821] -0.322 -0.265 -0.274 -0.287 -0.319 -0.330 -0.391 -0.565 -0.402 -0.291
## [831] -0.256 -0.263 -0.293 -0.282 -0.265 -0.172 -0.244 -0.328 -0.151 0.015
## [841] 0.085 0.055 -0.011 -0.203 -0.235 -0.142 -0.195 -0.054 -0.122 -0.082
## [851] -0.215 -0.242 -0.256 -0.108 -0.347 -0.275 -0.278 -0.190 -0.135 -0.153
## [861] -0.114 -0.075 -0.205 -0.473 -0.128 -0.214 -0.400 -0.324 -0.259 -0.280
## [871] -0.187 -0.214 -0.170 -0.172 -0.133 -0.194 -0.430 -0.687 -0.388 -0.397
## [881] -0.389 -0.344 -0.349 -0.257 -0.260 -0.143 -0.093 -0.516 -0.354 -0.207
## [891] -0.188 -0.211 -0.233 -0.127 -0.105 -0.079 -0.130 -0.119 0.026 -0.095
## [901] 0.007 -0.240 -0.122 -0.161 -0.163 -0.016 0.019 -0.061 -0.097 -0.061
## [911] -0.179 -0.084 0.126 -0.234 -0.307 -0.110 -0.207 -0.139 -0.110 -0.136
## [921] -0.030 -0.083 -0.232 -0.188 -0.276 -0.320 -0.322 -0.218 -0.195 -0.159
## [931] -0.132 -0.101 -0.188 -0.147 -0.313 -0.489 -0.221 -0.151 -0.371 -0.267
## [941] -0.090 -0.034 -0.060 -0.046 -0.141 -0.073 -0.056 -0.118 -0.286 0.171
## [951] -0.204 -0.302 -0.285 -0.184 -0.123 -0.122 -0.151 -0.036 -0.311 -0.216
## [961] -0.232 -0.366 -0.239 -0.236 -0.127 -0.086 0.019 -0.014 -0.068 -0.032
## [971] -0.046 -0.006 -0.154 0.080 -0.247 -0.167 -0.056 0.019 0.030 0.108
## [981] 0.130 0.088 0.013 -0.128 0.032 0.062 0.147 0.150 0.053 0.032
## [991] 0.124 0.053 0.124 0.197 0.096 -0.187 -0.042 -0.030 -0.218 -0.116
## [1001] -0.035 0.094 0.028 0.044 -0.113 -0.207 -0.068 0.160 -0.402 -0.183
## [1011] -0.184 -0.100 -0.122 0.016 0.134 -0.070 0.033 -0.077 -0.103 0.084
## [1021] -0.040 0.037 -0.114 0.089 -0.050 0.054 0.066 0.028 -0.117 0.211
## [1031] 0.061 0.102 0.143 -0.104 -0.039 0.006 0.007 0.044 -0.084 -0.019
## [1041] 0.042 -0.018 -0.060 -0.124 -0.249 0.036 -0.221 -0.020 0.071 -0.063
## [1051] 0.082 -0.037 0.024 0.188 0.005 0.162 0.282 0.133 0.101 0.024
## [1061] 0.107 0.187 0.210 0.213 0.294 0.247 0.062 -0.011 0.036 0.037
## [1071] 0.003 0.161 -0.100 0.004 -0.018 0.301 0.107 0.134 -0.013 -0.189
## [1081] 0.084 0.094 -0.196 -0.002 -0.190 -0.241 -0.105 -0.197 -0.067 -0.041
## [1091] -0.127 -0.383 -0.148 -0.186 -0.097 -0.071 -0.129 -0.064 -0.094 -0.086
## [1101] -0.126 0.043 -0.014 -0.236 0.018 -0.147 -0.212 -0.091 0.005 -0.019
## [1111] -0.140 -0.092 -0.093 -0.033 -0.096 -0.205 0.107 -0.201 -0.187 -0.043
## [1121] -0.113 -0.150 -0.110 -0.043 -0.058 -0.060 -0.084 -0.230 -0.329 -0.274
## [1131] -0.157 -0.209 -0.148 -0.119 -0.139 -0.165 -0.135 -0.170 -0.403 -0.237
## [1141] -0.364 -0.462 -0.270 -0.157 -0.089 -0.053 -0.018 0.044 0.091 0.092
## [1151] -0.036 0.106 0.106 0.069 -0.121 0.021 -0.007 0.008 0.003 0.018
## [1161] 0.018 -0.056 -0.218 -0.086 0.051 0.122 0.107 0.112 0.021 0.030
## [1171] -0.039 0.015 0.047 0.056 -0.071 0.068 -0.221 -0.103 -0.186 -0.212
## [1181] -0.254 -0.158 -0.254 -0.146 -0.107 -0.112 -0.021 -0.247 0.053 -0.169
## [1191] -0.389 -0.290 -0.219 -0.188 -0.192 -0.075 -0.103 -0.115 -0.268 -0.320
## [1201] -0.239 -0.379 -0.323 -0.326 -0.323 -0.261 -0.203 -0.242 -0.268 -0.195
## [1211] -0.295 -0.248 -0.154 -0.103 -0.115 -0.066 0.013 0.050 -0.019 0.061
## [1221] 0.027 -0.002 0.083 0.168 0.275 0.174 0.065 0.046 0.045 0.001
## [1231] 0.049 0.017 -0.034 0.013 0.041 0.078 0.110 0.056 0.088 0.062
## [1241] -0.010 0.053 0.033 -0.011 0.032 -0.061 -0.138 -0.055 -0.020 0.158
## [1251] -0.296 -0.128 -0.133 0.009 0.006 0.009 0.042 -0.006 -0.118 0.136
## [1261] 0.028 0.134 0.055 0.069 0.077 0.066 -0.023 0.007 -0.023 -0.079
## [1271] -0.049 -0.092 0.031 0.087 0.016 -0.020 -0.067 -0.062 -0.022 -0.017
## [1281] -0.004 0.033 0.024 0.027 -0.042 0.166 -0.123 -0.053 -0.040 -0.021
## [1291] 0.061 0.100 0.085 0.155 0.110 0.016 -0.023 -0.158 -0.277 -0.246
## [1301] -0.175 -0.185 -0.172 -0.255 -0.283 -0.342 -0.297 -0.414 -0.195 -0.290
## [1311] -0.244 -0.273 -0.155 -0.129 -0.217 -0.127 -0.100 -0.055 -0.148 -0.078
## [1321] -0.097 -0.080 -0.066 -0.132 -0.120 0.017 0.005 -0.057 -0.018 -0.119
## [1331] -0.108 -0.218 -0.172 -0.269 -0.110 -0.094 0.019 -0.125 -0.112 -0.052
## [1341] -0.096 0.071 -0.107 -0.134 -0.272 -0.227 0.015 -0.192 -0.209 -0.095
## [1351] -0.068 -0.055 -0.053 -0.013 -0.050 -0.093 -0.178 -0.106 0.028 0.104
## [1361] 0.079 0.025 0.076 0.051 0.027 0.039 0.130 0.169 0.068 0.172
## [1371] -0.040 0.043 -0.042 -0.013 -0.067 -0.079 -0.051 -0.096 -0.070 -0.215
## [1381] -0.078 -0.309 -0.286 -0.236 -0.232 -0.246 -0.152 -0.129 -0.126 -0.148
## [1391] -0.126 -0.206 -0.350 -0.286 -0.154 -0.065 -0.038 0.022 0.013 0.069
## [1401] -0.002 0.052 0.016 0.173 0.187 0.263 0.237 0.137 0.129 0.128
## [1411] 0.074 0.028 -0.023 0.011 -0.071 -0.070 -0.301 -0.314 -0.194 -0.169
## [1421] -0.146 -0.111 -0.065 -0.073 -0.140 -0.172 -0.174 -0.218 -0.077 -0.111
## [1431] -0.092 -0.091 -0.038 -0.082 -0.082 -0.120 -0.066 -0.220 -0.264 -0.281
## [1441] -0.197 -0.278 -0.391 -0.153 -0.258 -0.148 -0.156 -0.165 -0.129 -0.294
## [1451] -0.166 -0.092 -0.112 0.088 0.140 0.122 0.060 0.123 0.051 0.019
## [1461] 0.067 0.017 0.140 -0.014 0.065 0.027 0.027 -0.030 -0.114 -0.130
## [1471] -0.031 -0.120 -0.045 -0.092 0.011 -0.041 0.014 -0.121 0.006 -0.042
## [1481] -0.023 0.078 0.036 0.114 0.128 0.159 0.138 0.334 0.129 0.148
## [1491] 0.047 0.138 0.203 0.126 0.067 0.056 0.040 0.032 0.162 0.056
## [1501] 0.304 0.202 0.227 0.159 0.067 0.077 0.042 0.090 0.072 0.035
## [1511] 0.074 0.254 -0.007 -0.025 -0.104 0.030 0.051 -0.021 -0.002 -0.011
## [1521] 0.077 0.039 -0.012 0.209 0.373 0.334 0.247 0.188 0.173 0.198
## [1531] 0.207 0.235 0.211 0.114 0.276 0.113 0.130 0.047 0.106 0.037
## [1541] 0.115 0.018 0.061 0.095 0.036 0.012 -0.078 -0.211 0.086 -0.093
## [1551] 0.010 -0.015 0.052 -0.033 -0.018 0.047 0.005 0.029 -0.016 0.086
## [1561] 0.198 0.185 0.120 0.109 0.102 0.116 0.055 0.053 0.076 0.077
## [1571] -0.029 0.084 0.189 0.339 0.070 0.182 0.199 0.219 0.330 0.270
## [1581] 0.330 0.264 0.252 0.418 0.427 0.297 0.373 0.316 0.248 0.251
## [1591] 0.200 0.220 0.166 0.185 0.038 0.143 0.061 0.159 0.155 0.131
## [1601] 0.107 0.148 0.208 0.206 0.198 0.225 0.132 0.243 0.237 0.283
## [1611] 0.534 0.355 0.282 0.271 0.253 0.291 0.218 0.351 0.360 0.260
## [1621] 0.301 0.385 0.220 0.376 0.296 0.341 0.289 0.222 0.210 0.173
## [1631] 0.107 0.095 0.349 0.306 0.245 0.143 0.144 0.130 0.020 0.035
## [1641] 0.004 -0.021 -0.064 0.097 0.321 0.296 0.262 0.227 0.223 0.169
## [1651] 0.136 0.111 0.051 0.137 0.017 0.194 0.222 -0.032 0.223 0.243
## [1661] 0.244 0.224 0.185 0.224 0.239 0.327 0.361 0.331 0.462 0.558
## [1671] 0.373 0.319 0.281 0.369 0.387 0.415 0.323 0.350 0.361 0.280
## [1681] 0.163 0.343 0.217 0.164 0.256 0.274 0.281 0.228 0.189 0.165
## [1691] 0.166 0.278 0.254 0.348 0.324 0.295 0.299 0.431 0.422 0.444
## [1701] 0.517 0.538 0.488 0.572 0.512 0.824 0.593 0.660 0.613 0.639
## [1711] 0.704 0.670 0.475 0.452 0.345 0.469 0.404 0.607 0.283 0.357
## [1721] 0.296 0.328 0.340 0.287 0.324 0.280 0.197 0.383 0.203 0.429
## [1731] 0.388 0.469 0.304 0.267 0.250 0.365 0.304 0.219 0.123 0.172
## [1741] 0.343 0.307 0.505 0.432 0.456 0.415 0.447 0.502 0.407 0.390
## [1751] 0.523 0.348 0.641 0.680 0.620 0.445 0.429 0.449 0.488 0.395
## [1761] 0.439 0.395 0.423 0.301 0.545 0.430 0.393 0.397 0.450 0.440
## [1771] 0.454 0.518 0.521 0.573 0.429 0.573 0.508 0.619 0.527 0.469
## [1781] 0.295 0.358 0.364 0.436 0.452 0.494 0.586 0.385 0.502 0.355
## [1791] 0.512 0.553 0.494 0.516 0.537 0.510 0.526 0.514 0.493 0.305
global_s %>% glimpse()## num [1:1800] -0.384 -0.457 -0.673 -0.344 -0.311 -0.071 -0.246 -0.235 -0.38 -0.418 ...
plot(global_s)global.ts <- ts(global_s, start = c(1856, 1), freq = 12)
plot(global.ts, main = "")new.time <- time(global.ts) # t+(i-1)/freq
plot(global.ts, main = "")
abline(lm(global.ts~new.time), col = "red")비정상 시계열, 차분 필요
library(forecast)
female <- scan('data/timeseries/female.txt')
female.ts <- ts(female)
plot(female.ts)ggtsdisplay(female.ts)ndiffs(female)## [1] 1
female_d <- diff(female.ts)
ggtsdisplay(female_d)fit <- Arima(female, order = c(0, 1, 0), include.drift = T)
checkresiduals(fit)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0) with drift
## Q* = 7.1676, df = 9, p-value = 0.6197
##
## Model df: 1. Total lags used: 10
confint(Arima(female, order = c(1, 1, 0), include.drift = T)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.09026514 0.2851598
## drift 1.85683163 6.5762383
confint(Arima(female, order = c(0, 1, 1), include.drift = T)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ma1 -0.08681113 0.3124909
## drift 1.84910777 6.5857555
# ARIMA(0, 1, 0) 확정 auto.arima(female, stepwise = F)## Series: female
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 4.215
## s.e. 1.093
##
## sigma^2 estimated as 129: log likelihood=-411.34
## AIC=826.68 AICc=826.8 BIC=832.03
auto.arima(female, ic = 'bic', stepwise = F)## Series: female
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 4.215
## s.e. 1.093
##
## sigma^2 estimated as 129: log likelihood=-411.34
## AIC=826.68 AICc=826.8 BIC=832.03
fit <- Arima(female, order = c(0, 1, 0), include.drift = T)
plot(forecast(fit))summary(forecast(fit))##
## Forecast method: ARIMA(0,1,0) with drift
##
## Model Information:
## Series: female
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 4.215
## s.e. 1.093
##
## sigma^2 estimated as 129: log likelihood=-411.34
## AIC=826.68 AICc=826.8 BIC=832.03
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001960972 11.25385 8.279046 -0.1344574 2.231458 0.9030153
## ACF1
## Training set 0.09833436
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 109 671.2150 656.6571 685.7728 648.9507 693.4792
## 110 675.4299 654.8421 696.0178 643.9435 706.9163
## 111 679.6449 654.4300 704.8597 641.0820 718.2077
## 112 683.8598 654.7442 712.9754 639.3313 728.3883
## 113 688.0748 655.5225 720.6270 638.2904 737.8591
## 114 692.2897 656.6305 727.9489 637.7537 746.8258
## 115 696.5047 657.9883 735.0210 637.5990 755.4103
## 116 700.7196 659.5439 741.8953 637.7468 763.6924
## 117 704.9346 661.2611 748.6080 638.1418 771.7273
## 118 709.1495 663.1137 755.1854 638.7438 779.5553
tour <- scan('data/timeseries/tourist.txt')
tour.ts <- ts(tour, start = 1981, frequency = 12)정상성 확인 2가지
plot(tour.ts)분산 안정화 변환
lntour <- log(tour.ts)
plot(lntour)일반 차분, 계절 차분 필요
Acf(lntour, lag.max = 48)ndiffs(lntour)## [1] 1
nsdiffs(lntour)## [1] 1
ggseasonplot(lntour)추가적으로 계절 차분 필요
tour_1 <- diff(lntour)
ggtsdisplay(tour_1, lag.max = 48)추가적인 일반 차분이 필요해보임
tour_12 <- diff(lntour, lag = 12)
ggtsdisplay(tour_12, lag.max = 48)ndiffs(tour_12)## [1] 1
d = 1, D = 1로 결정
tour_1_12 <- diff(tour_1, lag = 12)
ggtsdisplay(tour_1_12, lag.max = 48)Acf(tour_1_12, lag.max = 48)Pacf(tour_1_12, lag.max = 48)계절형 확인
비계절형 확인
ACF 절단, PACF 감소 : p = 0, q = 1
ACF 감소, PACF 절단 : p = 2, q = 0
최종 후보 모형
\(ARIMA(0, 1, 1)(0, 1, 1)_{12}\)
\(ARIMA(0, 1, 1)(1, 1, 0)_{12}\)
\(ARIMA(2, 1, 0)(0, 1, 1)_{12}\)
\(ARIMA(2, 1, 0)(1, 1, 0)_{12}\)
fit1 <- Arima(lntour, order = c(0, 1, 1),
seasonal = list(order = c(0, 1, 1), period = 12))
confint(fit1) # 모든 모수 유의 ## 2.5 % 97.5 %
## ma1 -0.7168817 -0.4432093
## sma1 -0.7075792 -0.3973742
checkresiduals(fit1)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,1)[12]
## Q* = 26.06, df = 22, p-value = 0.2491
##
## Model df: 2. Total lags used: 24
과대 적합 확인
\(ARIMA(0, 1, 1)(0, 1, 1)_{12}\)
fit1_1 <- Arima(lntour, order = c(1, 1, 1),
seasonal = list(order = c(0, 1, 1), period = 12))
fit1_2 <- Arima(lntour, order = c(0, 1, 2),
seasonal = list(order = c(0, 1, 1), period = 12))
confint(fit1_1) # 추가된 모수 유의적 ## 2.5 % 97.5 %
## ar1 -0.6141189 -0.07646367
## ma1 -0.6019970 -0.07213300
## sma1 -0.6757900 -0.36348852
confint(fit1_2) # 추가된 모수 유의적## 2.5 % 97.5 %
## ma1 -0.84863210 -0.5006825
## ma2 0.04584469 0.4197128
## sma1 -0.67412800 -0.3580535
checkresiduals(fit1_1)##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(0,1,1)[12]
## Q* = 19.807, df = 21, p-value = 0.5335
##
## Model df: 3. Total lags used: 24
checkresiduals(fit1_2)##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)(0,1,1)[12]
## Q* = 17.468, df = 21, p-value = 0.6824
##
## Model df: 3. Total lags used: 24
confint(Arima(lntour, order = c(1, 1, 2),
seasonal = list(order = c(0, 1, 1), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.7576853 0.48124313
## ma1 -1.1513542 0.05538992
## ma2 -0.2314544 0.55715140
## sma1 -0.6724509 -0.35743032
confint(Arima(lntour, order = c(2, 1, 1),
seasonal = list(order = c(0, 1, 1), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -1.4005757 0.05563757
## ar2 -0.6505574 0.21547715
## ma1 -0.7633223 0.73372547
## sma1 -0.6770033 -0.36266535
confint(Arima(lntour, order = c(0, 1, 3),
seasonal = list(order = c(0, 1, 1), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ma1 -0.88113628 -0.5041239
## ma2 0.03417003 0.5055626
## ma3 -0.24727265 0.1398392
## sma1 -0.67243530 -0.3582209
# fit1_1, fit1_2 예측에 사용 가능 fit2 <- Arima(lntour, order = c(0, 1, 1),
seasonal = list(order = c(1, 1, 0), period = 12))
checkresiduals(fit2) # 백색잡음 오차 가정 위반 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(1,1,0)[12]
## Q* = 38.217, df = 22, p-value = 0.01732
##
## Model df: 2. Total lags used: 24
과대 적합
\(ARIMA(0, 1, 1)(1, 1, 0)_{12}\)
fit2_1 <- Arima(lntour, order = c(1, 1, 1),
seasonal = list(order = c(1, 1, 0), period = 12))
fit2_2 <- Arima(lntour, order = c(0, 1, 2),
seasonal = list(order = c(1, 1, 0), period = 12))
confint(fit2_1) # 추가된 모수 유의 ## 2.5 % 97.5 %
## ar1 -0.6244618 -0.1257088
## ma1 -0.6016499 -0.1026214
## sar1 -0.6862031 -0.3682476
confint(fit2_2) # 추가된 모수 유의## 2.5 % 97.5 %
## ma1 -0.88940432 -0.5453766
## ma2 0.08103535 0.4425904
## sar1 -0.68381121 -0.3633908
checkresiduals(fit2_1) # 가정 만족##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(1,1,0)[12]
## Q* = 24.433, df = 21, p-value = 0.2726
##
## Model df: 3. Total lags used: 24
checkresiduals(fit2_2) # 가정 만족 ##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)(1,1,0)[12]
## Q* = 22.927, df = 21, p-value = 0.3479
##
## Model df: 3. Total lags used: 24
추가 과대 적합
\(ARIMA(1, 1, 1)(1, 1, 0)_{12}\), \(ARIMA(0, 1, 2)(1, 1, 0)_{12}\)
confint(Arima(lntour, order = c(1, 1, 2),
seasonal = list(order = c(1, 1, 0), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.7327746 0.4113774
## ma1 -1.1291821 -0.0140854
## ma2 -0.2036577 0.5600746
## sar1 -0.6839650 -0.3642105
confint(Arima(lntour, order = c(2, 1, 1),
seasonal = list(order = c(1, 1, 0), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -1.3501035 0.003178749
## ar2 -0.6346756 0.214711438
## ma1 -0.7557297 0.634389249
## sar1 -0.6863941 -0.367535856
confint(Arima(lntour, order = c(0, 1, 3),
seasonal = list(order = c(1, 1, 0), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ma1 -0.92461468 -0.5514994
## ma2 0.07239749 0.5423576
## ma3 -0.25181127 0.1276558
## sar1 -0.68456703 -0.3650019
# fit2_1, fit2_2 잠정 후보 모형 fit3 <- Arima(lntour, order = c(2, 1, 0),
seasonal = list(order = c(0, 1, 1), period = 12))
checkresiduals(fit3) ##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)(0,1,1)[12]
## Q* = 18.225, df = 21, p-value = 0.6348
##
## Model df: 3. Total lags used: 24
과대 적합
confint(Arima(lntour, order = c(2, 1, 1),
seasonal = list(order = c(0, 1, 1), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -1.4005757 0.05563757
## ar2 -0.6505574 0.21547715
## ma1 -0.7633223 0.73372547
## sma1 -0.6770033 -0.36266535
confint(Arima(lntour, order = c(3, 1, 0),
seasonal = list(order = c(0, 1, 1), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.8719017 -0.50296810
## ar2 -0.4487084 -0.00763568
## ar3 -0.1888452 0.18083322
## sma1 -0.6769962 -0.36266169
# fit3 잠정 후보 모형 fit4 <- Arima(lntour, order = c(2, 1, 0),
seasonal = list(order = c(1, 1, 0), period = 12))
checkresiduals(fit4) ##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)(1,1,0)[12]
## Q* = 24.531, df = 21, p-value = 0.268
##
## Model df: 3. Total lags used: 24
confint(Arima(lntour, order = c(2, 1, 1),
seasonal = list(order = c(1, 1, 0), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -1.3501035 0.003178749
## ar2 -0.6346756 0.214711438
## ma1 -0.7557297 0.634389249
## sar1 -0.6863941 -0.367535856
confint(Arima(lntour, order = c(3, 1, 0),
seasonal = list(order = c(1, 1, 0), period = 12))) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.9181728 -0.55167096
## ar2 -0.4827245 -0.03043428
## ar3 -0.2034879 0.16766886
## sar1 -0.6863178 -0.36759502
# fit4 잠정 후보 모형 최종모형 : \(ARIMA(2, 1, 0)(1, 1, 0)_{12}\)
c(fit1_1$aic, fit1_1$bic)## [1] -335.1006 -323.9841
c(fit1_2$aic, fit1_2$bic)## [1] -335.4349 -324.3184
c(fit2_1$aic, fit2_1$bic)## [1] -337.3768 -326.2603
c(fit2_2$aic, fit2_2$bic)## [1] -337.7425 -326.6260
c(fit3$aic, fit3$bic)## [1] -335.8118 -324.6953
c(fit4$aic, fit4$bic)## [1] -338.0812 -326.9647
# fit4 최종 모형으로 선택 로그 변환된 자료에 대한 예측
fit4 <- Arima(lntour, order = c(2, 1, 0),
seasonal = list(order = c(1, 1, 0), period = 12))
plot(forecast(fit4, h = 12))원자료에 대한 예측
fit4_1 <- Arima(tour.ts, order = c(2, 1, 0),
seasonal = list(order = c(1, 1, 0), period = 12), lambda = 0)
plot(forecast(fit4_1, h = 12))실제값과 예측값 비교
tour92 <- scan('data/timeseries/tour92.txt')
tour92 <- ts(tour92, start = 1992, freq = 12)
fore_arima <- forecast(fit4_1, h = 12, level = 95)
plot(fore_arima)
new_t <- seq(1992, by = 1/12, length = 12)
lines(new_t, tour92, col = 'red')plot(fore_arima, xlim =c(1992,1993), ylim =c(2e+5,4.5e+5))
lines(new_t, tour92, col = "red", lwd = 2)
legend("topleft", c("observed data", "forecast"), lwd = 2, col = c("red" ,"blue"), bty = 'n')예측 정확성 비교
accuracy(fore_arima, tour92)## ME RMSE MAE MPE MAPE MASE
## Training set -101.7388 9382.833 6691.828 -0.1666852 4.06514 0.3697756
## Test set -44218.3189 51964.159 44218.319 -16.5746164 16.57462 2.4434060
## ACF1 Theil's U
## Training set 0.1129040 NA
## Test set 0.6654927 1.920409
tour <- scan('data/timeseries/tourist.txt')
tour.ts <- ts(tour, start = 1981, frequency = 12)
tour92 <- scan('data/timeseries/tour92.txt')
tour92 <- ts(tour92, start = 1992, freq = 12)
lntour <- log(tour.ts)2차 추세모형 or 강한 상관관계
Time <- time(lntour)
Month <- cycle(lntour)
fit1 <- lm(lntour~Time + factor(Month) + 0)
summary(fit1)##
## Call:
## lm(formula = lntour ~ Time + factor(Month) + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16248 -0.06957 -0.01932 0.07319 0.22759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Time 0.1195 0.0025 47.78 <2e-16 ***
## factor(Month)1 -225.6597 4.9657 -45.44 <2e-16 ***
## factor(Month)2 -225.6373 4.9659 -45.44 <2e-16 ***
## factor(Month)3 -225.4362 4.9661 -45.40 <2e-16 ***
## factor(Month)4 -225.3477 4.9663 -45.38 <2e-16 ***
## factor(Month)5 -225.3286 4.9665 -45.37 <2e-16 ***
## factor(Month)6 -225.3848 4.9667 -45.38 <2e-16 ***
## factor(Month)7 -225.3999 4.9669 -45.38 <2e-16 ***
## factor(Month)8 -225.3277 4.9671 -45.36 <2e-16 ***
## factor(Month)9 -225.3804 4.9673 -45.37 <2e-16 ***
## factor(Month)10 -225.2771 4.9675 -45.35 <2e-16 ***
## factor(Month)11 -225.4349 4.9677 -45.38 <2e-16 ***
## factor(Month)12 -225.6338 4.9679 -45.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09084 on 119 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 1.741e+05 on 13 and 119 DF, p-value: < 2.2e-16
checkresiduals(fit1)##
## Breusch-Godfrey test for serial correlation of order up to 16
##
## data: Residuals
## LM test = 87.23, df = 16, p-value = 8.074e-12
양의 상관관계 존재
fit2 <- lm(lntour~Time + I(Time^2) + factor(Month) + 0)
checkresiduals(fit2)##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals
## LM test = 69.78, df = 17, p-value = 2.354e-08
AR(3) 식별
Resid <- fit2$residuals
ggtsdisplay(Resid, lag.max = 48)fit_r1 <- Arima(Resid, order = c(3, 0, 0), include.mean = FALSE)
confint(fit_r1) ## 2.5 % 97.5 %
## ar1 0.14262044 0.4814755
## ar2 0.09449107 0.4352225
## ar3 0.03698488 0.3775787
checkresiduals(fit_r1)##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0) with zero mean
## Q* = 6.0871, df = 7, p-value = 0.5296
##
## Model df: 3. Total lags used: 10
confint(Arima(Resid, order = c(3, 0, 1), include.mean = FALSE)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.55000199 0.7343275
## ar2 0.04907373 0.6498214
## ar3 0.03680223 0.5309914
## ma1 -0.43556251 0.8971931
confint(Arima(Resid, order = c(4, 0, 0), include.mean = FALSE)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 0.15430624 0.5022553
## ar2 0.10729112 0.4628918
## ar3 0.04984801 0.4040409
## ar4 -0.24959477 0.1091437
역행렬 계산 안될 경우 1차 추세모형 대신 시도
fit_x <- model.matrix(fit2)
f1 <- Arima(tour.ts, order = c(3, 0, 0), include.mean = FALSE, xreg = fit_x, lambda = 0)## Error in solve.default(res$hessian * n.used, A): system is computationally singular: reciprocal condition number = 5.58217e-17
Resid <- fit1$residuals
ggtsdisplay(Resid, lag.max = 48) # AR(3)\(AR(3)\) 모형 확정
fit_r2 <- Arima(Resid, order = c(3, 0, 0), include.mean = FALSE)
confint(fit_r2)## 2.5 % 97.5 %
## ar1 0.17792211 0.5181811
## ar2 0.12261593 0.4656961
## ar3 0.06598099 0.4075507
checkresiduals(fit_r2)##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0) with zero mean
## Q* = 6.0104, df = 7, p-value = 0.5385
##
## Model df: 3. Total lags used: 10
confint(Arima(Resid, order = c(3, 0, 1), include.mean = FALSE)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 -0.42304400 0.8008538
## ar2 0.04529015 0.6846541
## ar3 0.03132570 0.5676929
## ma1 -0.46971226 0.8080546
confint(Arima(Resid, order = c(4, 0, 0), include.mean = FALSE)) # 추가된 모수 비유의 ## 2.5 % 97.5 %
## ar1 0.18467678 0.5365285
## ar2 0.12910312 0.4899985
## ar3 0.07264062 0.4313678
## ar4 -0.23078402 0.1314767
fit_x <- model.matrix(fit1)
f1 <- Arima(tour.ts, order = c(3, 0, 0), include.mean = FALSE, xreg = fit_x, lambda = 0)new.t <- time(ts(start = c(1992, 1), end = c(1992, 12), freq = 12))
new.x <- cbind(new.t, diag(rep(1, 12)))
fore_reg <- forecast(f1, xreg = new.x, level = 95)## Warning in forecast.forecast_ARIMA(f1, xreg = new.x, level = 95): xreg contains
## different column names from the xreg used in training. Please check that the
## regressors are in the same order.
plot(fore_reg)fit4_1 <- Arima(tour.ts, order = c(2, 1, 0), seasonal = list(order = c(1, 1, 0), period = 12),
lambda = 0)
fore_arima <- forecast(fit4_1, h = 12, level = 95)accuracy(fore_arima, tour92)## ME RMSE MAE MPE MAPE MASE
## Training set -101.7388 9382.833 6691.828 -0.1666852 4.06514 0.3697756
## Test set -44218.3189 51964.159 44218.319 -16.5746164 16.57462 2.4434060
## ACF1 Theil's U
## Training set 0.1129040 NA
## Test set 0.6654927 1.920409
accuracy(fore_reg, tour92)## ME RMSE MAE MPE MAPE MASE
## Training set 120.1524 9569.667 6442.998 -0.3146017 4.019269 0.3560258
## Test set -53052.9409 61306.604 54979.697 -19.5490298 20.273884 3.0380559
## ACF1 Theil's U
## Training set 0.1121612 NA
## Test set 0.5282178 2.260151